###############################
# Analysis of conjoint data 
###############################

rm(list=ls())

# load packages 
library(foreign)
library(lmtest)
library(sandwich)
library(stargazer)
library(msm)
library(Hmisc)

# load function that does clustered SEs
vcovCluster <- function(
  model,
  cluster
)
{
  require(sandwich)
  require(lmtest)
  if(nrow(model.matrix(model))!=length(cluster)){
    stop("check your data: cluster variable has different N than model")
  }
  M <- length(unique(cluster))
  N <- length(cluster)           
  K <- model$rank   
  dfc <- (M/(M - 1)) * ((N - 1)/(N - K))
  uj  <- apply(estfun(model), 2, function(x) tapply(x, cluster, sum));
  rcse.cov <- dfc * sandwich(model, meat = crossprod(uj)/N)
  return(rcse.cov)
}

##############################
# Prepare data
##############################

# load data
d = read.dta("~/Dropbox/Cerrillos 2017/08_replication/conjoint_cerrillos_LAPS.dta")
table(d$failcon)
names(d)
dim(d)

# check data
d$idnum = as.numeric(d$idnum)
d$failcon[is.na(d$failcon)]=0
table(d$failcon, exclude = NULL) 
prop.table(table(d$failcon, exclude = NULL))
table(d$enumerator)
table(d$selected, exclude = NULL)

# drop failcon and non-responses
d = subset(d, d$selected!=99)
d = subset(d, d$selected!=88)
table(d$selected, exclude = NULL)
table(d$failcon, exclude = NULL)

# Vote past election
table(d$a11)
d$votepastelection = 0
d$votepastelection[d$a11==1]=1
table(d$votepastelection)
round(prop.table(table(d$votepastelection)),2)

# Vote next election
d$a10 = as.numeric(d$a10)
table(d$a10, exclude = NULL)
d$votenextelection = 0
d$votenextelection[d$a10<7]=1
table(d$votenextelection)
round(prop.table(table(d$votenextelection)),2)

d$novotenextelection = 1 - d$votenextelection
table(d$novotenextelection)
round(prop.table(table(d$novotenextelection)),2)

# Name last election
table(d$a12)
d$namepastelection = 0
d$namepastelection[d$a12==1]=1
d$namepastelection[d$a12==2]=1
d$namepastelection[d$a12==3]=1
d$namepastelection[d$a12==4]=1
d$namepastelection[d$a12==5]=1
table(d$namepastelection)
prop.table(table(d$namepastelection))

# Interest in politics
table(d$a3, exclude = NULL)
d$interest = 0
d$interest[d$a3<4]=1
table(d$interest)
prop.table(table(d$interest))

# Likely voter 1
d$likely1 = 0
d$likely1[d$interest==1 & d$votepastelection==1 & d$votenextelection==1]=1
prop.table(table(d$likely1))
d$nolikely1 = 1 - d$likely1

# Likely voter 2
d$likely2 = 0
d$likely2[d$votepastelection==1 & d$votenextelection==1]=1
prop.table(table(d$likely2))
d$nolikely2 = 1 - d$likely2

# Likely voter 3
d$likely3 = 0
d$likely3[d$votepastelection==1 & d$interest==1]=1
prop.table(table(d$likely3))
d$nolikely3 = 1 - d$likely3

# ideology and likely
table(d$a5,d$likely1)

# prepare samples
table(d$a5, exclude = NULL)

d_left = d[d$a5<5,]
table(d_left$a5)

d_right = d[d$a5>6 & d$a5<11,]
table(d_right$a5)

d_center = d[d$a5>4 & d$a5<7,]
table(d_center$a5)

d_noideo = d[d$a5==88 | d$a5==99,]
table(d_noideo$a5)

# check observations
table(d_left$likely1)
table(d_right$likely1)
table(d_center$likely1)
table(d_noideo$likely1)

##############################
# Interaction likely voters 2
##############################

# Left
describe(d_left$idnum)
left_likely2 <- lm(d_left$outcome ~ d_left$atideology + d_left$atprofession + d_left$atage + 
                     d_left$likely2 + 
                     d_left$atideology*d_left$likely2 + d_left$atprofession*d_left$likely2 + d_left$atage*d_left$likely2)
left_likely2_c <- round(coeftest(left_likely2, vcov = vcovCluster(left_likely2, cluster = d_left$idnum)),4)
left_likely2_c

# Right
describe(d_right$idnum)
right_likely2 <- lm(d_right$outcome ~ d_right$atideology + d_right$atprofession + d_right$atage + 
                      d_right$likely2 +
                      d_right$atideology*d_right$likely2 + d_right$atprofession*d_right$likely2 + d_right$atage*d_right$likely2)
right_likely2_c <- round(coeftest(right_likely2, vcov = vcovCluster(right_likely2, cluster = d_right$idnum)),4)
right_likely2_c

# Center
describe(d_center$idnum)
center_likely2 <- lm(d_center$outcome ~ d_center$atideology + d_center$atprofession + d_center$atage +
                       d_center$likely2 +
                       d_center$atideology*d_center$likely2 + d_center$atprofession*d_center$likely2 + d_center$atage*d_center$likely2)
center_likely2_c <- round(coeftest(center_likely2, vcov = vcovCluster(center_likely2, cluster = d_center$idnum)),4)
center_likely2_c

# No ideology
describe(d_noideo$idnum)
noideo_likely2 <- lm(d_noideo$outcome ~ d_noideo$atideology + d_noideo$atprofession + d_noideo$atage + 
                       d_noideo$likely2 + 
                       d_noideo$atideology*d_noideo$likely2 + d_noideo$atprofession*d_noideo$likely2 + d_noideo$atage*d_noideo$likely2) 
noideo_likely2_c <- round(coeftest(noideo_likely2, vcov = vcovCluster(noideo_likely2, cluster = d_noideo$idnum)),4)
noideo_likely2_c

##################################
# Interaction no likely voters 2
##################################

# Left
describe(d_left$idnum)
left_nolikely2 <- lm(d_left$outcome ~ d_left$atideology + d_left$atprofession + d_left$atage + 
                       d_left$nolikely2 + 
                       d_left$atideology*d_left$nolikely2 + d_left$atprofession*d_left$nolikely2 + d_left$atage*d_left$nolikely2)
left_nolikely2_c <- round(coeftest(left_nolikely2, vcov = vcovCluster(left_nolikely2, cluster = d_left$idnum)),4)
left_nolikely2_c

# Right
describe(d_right$idnum)
right_nolikely2 <- lm(d_right$outcome ~ d_right$atideology + d_right$atprofession + d_right$atage + 
                        d_right$nolikely2 +
                        d_right$atideology*d_right$nolikely2 + d_right$atprofession*d_right$nolikely2 + d_right$atage*d_right$nolikely2)
right_nolikely2_c <- round(coeftest(right_nolikely2, vcov = vcovCluster(right_nolikely2, cluster = d_right$idnum)),4)
right_nolikely2_c

# Center
describe(d_center$idnum)
center_nolikely2 <- lm(d_center$outcome ~ d_center$atideology + d_center$atprofession + d_center$atage +
                         d_center$nolikely2 +
                         d_center$atideology*d_center$nolikely2 + d_center$atprofession*d_center$nolikely2 + d_center$atage*d_center$nolikely2)
center_nolikely2_c <- round(coeftest(center_nolikely2, vcov = vcovCluster(center_nolikely2, cluster = d_center$idnum)),4)
center_nolikely2_c

# No ideology
describe(d_noideo$idnum)
noideo_nolikely2 <- lm(d_noideo$outcome ~ d_noideo$atideology + d_noideo$atprofession + d_noideo$atage + 
                         d_noideo$nolikely2 + 
                         d_noideo$atideology*d_noideo$nolikely2 + d_noideo$atprofession*d_noideo$nolikely2 + d_noideo$atage*d_noideo$nolikely2) 
noideo_nolikely2_c <- round(coeftest(noideo_nolikely2, vcov = vcovCluster(noideo_nolikely2, cluster = d_noideo$idnum)),4)
noideo_nolikely2_c

#####################
# Prepare plots
#####################

# left no likely2
left_likely2_c # regression results
coeff = left_likely2_c [1:6,1] # save coefficientes
coeff2 = cbind(coeff) # matrix of coefficientes
c_se = left_likely2_c [1:6,2] # save cluster standard errors
c_se2 = cbind(c_se) # matrix of cluster standard errors
results = data.frame(coeff2,c_se2) # generate dataset 
colnames(results) = c("y1","r1")  # change column names
results = results[-1,] # drop intercept 
baseline =  c(0,0) # gen baseline rows
results <- rbind(baseline,results[1, ], baseline,results[2:3, ],baseline,results[4:5, ]) # add baselines
rownames(results) = c("1b.atideology","2.atideology",
                      "1b.atprofession","2.atprofession","3.atprofession",
                      "1b.atage","2.atage","3.atage")
results 
write.table(results, "~/Dropbox/Cerrillos 2017/08_replication/001_left_nolikely2.txt", sep="\t")

# left likely2
left_nolikely2_c # regression results
coeff = left_nolikely2_c [1:6,1] # save coefficientes
coeff2 = cbind(coeff) # matrix of coefficientes
c_se = left_nolikely2_c [1:6,2] # save cluster standard errors
c_se2 = cbind(c_se) # matrix of cluster standard errors
results = data.frame(coeff2,c_se2) # generate dataset 
colnames(results) = c("y1","r1")  # change column names
results = results[-1,] # drop intercept 
baseline =  c(0,0) # gen baseline rows
results <- rbind(baseline,results[1, ], baseline,results[2:3, ],baseline,results[4:5, ]) # add baselines
rownames(results) = c("1b.atideology","2.atideology",
                      "1b.atprofession","2.atprofession","3.atprofession",
                      "1b.atage","2.atage","3.atage")
results 
write.table(results, "~/Dropbox/Cerrillos 2017/08_replication/002_left_likely2.txt", sep="\t")

# left difference
left_likely2_c # regression results
coeff = left_likely2_c [8:12,1] # save coefficientes
coeff2 = cbind(coeff) # matrix of coefficientes
c_se = left_likely2_c [8:12,2] # save cluster standard errors
c_se2 = cbind(c_se) # matrix of cluster standard errors
results = data.frame(coeff2,c_se2) # generate dataset 
colnames(results) = c("y1","r1")  # change column names
#results = results[-1,] # drop intercept 
baseline =  c(0,0) # gen baseline rows
results <- rbind(baseline,results[1, ], baseline,results[2:3, ],baseline,results[4:5, ]) # add baselines
rownames(results) = c("1b.atideology","2.atideology",
                      "1b.atprofession","2.atprofession","3.atprofession",
                      "1b.atage","2.atage","3.atage")
results 
write.table(results, "~/Dropbox/Cerrillos 2017/08_replication/003_left_likely2_difference.txt", sep="\t")

# right no likely2
right_likely2_c # regression results
coeff = right_likely2_c [1:6,1] # save coefficientes
coeff2 = cbind(coeff) # matrix of coefficientes
c_se = right_likely2_c [1:6,2] # save cluster standard errors
c_se2 = cbind(c_se) # matrix of cluster standard errors
results = data.frame(coeff2,c_se2) # generate dataset 
colnames(results) = c("y1","r1")  # change column names
results = results[-1,] # drop intercept 
baseline =  c(0,0) # gen baseline rows
results <- rbind(baseline,results[1, ], baseline,results[2:3, ],baseline,results[4:5, ]) # add baselines
rownames(results) = c("1b.atideology","2.atideology",
                      "1b.atprofession","2.atprofession","3.atprofession",
                      "1b.atage","2.atage","3.atage")
results 
write.table(results, "~/Dropbox/Cerrillos 2017/08_replication/004_right_nolikely2.txt", sep="\t")

# right likely2
right_nolikely2_c # regression results
coeff = right_nolikely2_c [1:6,1] # save coefficientes
coeff2 = cbind(coeff) # matrix of coefficientes
c_se = right_nolikely2_c [1:6,2] # save cluster standard errors
c_se2 = cbind(c_se) # matrix of cluster standard errors
results = data.frame(coeff2,c_se2) # generate dataset 
colnames(results) = c("y1","r1")  # change column names
results = results[-1,] # drop intercept 
baseline =  c(0,0) # gen baseline rows
results <- rbind(baseline,results[1, ], baseline,results[2:3, ],baseline,results[4:5, ]) # add baselines
rownames(results) = c("1b.atideology","2.atideology",
                      "1b.atprofession","2.atprofession","3.atprofession",
                      "1b.atage","2.atage","3.atage")
results 
write.table(results, "~/Dropbox/Cerrillos 2017/08_replication/005_right_likely2.txt", sep="\t")

# right difference
right_likely2_c # regression results
coeff = right_likely2_c [8:12,1] # save coefficientes
coeff2 = cbind(coeff) # matrix of coefficientes
c_se = right_likely2_c [8:12,2] # save cluster standard errors
c_se2 = cbind(c_se) # matrix of cluster standard errors
results = data.frame(coeff2,c_se2) # generate dataset 
colnames(results) = c("y1","r1")  # change column names
#results = results[-1,] # drop intercept 
baseline =  c(0,0) # gen baseline rows
results <- rbind(baseline,results[1, ], baseline,results[2:3, ],baseline,results[4:5, ]) # add baselines
rownames(results) = c("1b.atideology","2.atideology",
                      "1b.atprofession","2.atprofession","3.atprofession",
                      "1b.atage","2.atage","3.atage")
results 
write.table(results, "~/Dropbox/Cerrillos 2017/08_replication/006_right_likely2_difference.txt", sep="\t")

# center no likely2
center_likely2_c # regression results
coeff = center_likely2_c [1:6,1] # save coefficientes
coeff2 = cbind(coeff) # matrix of coefficientes
c_se = center_likely2_c [1:6,2] # save cluster standard errors
c_se2 = cbind(c_se) # matrix of cluster standard errors
results = data.frame(coeff2,c_se2) # generate dataset 
colnames(results) = c("y1","r1")  # change column names
results = results[-1,] # drop intercept 
baseline =  c(0,0) # gen baseline rows
results <- rbind(baseline,results[1, ], baseline,results[2:3, ],baseline,results[4:5, ]) # add baselines
rownames(results) = c("1b.atideology","2.atideology",
                      "1b.atprofession","2.atprofession","3.atprofession",
                      "1b.atage","2.atage","3.atage")
results 
write.table(results, "~/Dropbox/Cerrillos 2017/08_replication/007_center_nolikely2.txt", sep="\t")

# center likely2
center_nolikely2_c # regression results
coeff = center_nolikely2_c [1:6,1] # save coefficientes
coeff2 = cbind(coeff) # matrix of coefficientes
c_se = center_nolikely2_c [1:6,2] # save cluster standard errors
c_se2 = cbind(c_se) # matrix of cluster standard errors
results = data.frame(coeff2,c_se2) # generate dataset 
colnames(results) = c("y1","r1")  # change column names
results = results[-1,] # drop intercept 
baseline =  c(0,0) # gen baseline rows
results <- rbind(baseline,results[1, ], baseline,results[2:3, ],baseline,results[4:5, ]) # add baselines
rownames(results) = c("1b.atideology","2.atideology",
                      "1b.atprofession","2.atprofession","3.atprofession",
                      "1b.atage","2.atage","3.atage")
results 
write.table(results, "~/Dropbox/Cerrillos 2017/08_replication/008_center_likely2.txt", sep="\t")

# center difference
center_likely2_c # regression results
coeff = center_likely2_c [8:12,1] # save coefficientes
coeff2 = cbind(coeff) # matrix of coefficientes
c_se = center_likely2_c [8:12,2] # save cluster standard errors
c_se2 = cbind(c_se) # matrix of cluster standard errors
results = data.frame(coeff2,c_se2) # generate dataset 
colnames(results) = c("y1","r1")  # change column names
#results = results[-1,] # drop intercept 
baseline =  c(0,0) # gen baseline rows
results <- rbind(baseline,results[1, ], baseline,results[2:3, ],baseline,results[4:5, ]) # add baselines
rownames(results) = c("1b.atideology","2.atideology",
                      "1b.atprofession","2.atprofession","3.atprofession",
                      "1b.atage","2.atage","3.atage")
results 
write.table(results, "~/Dropbox/Cerrillos 2017/08_replication/009_center_likely2_difference.txt", sep="\t")

# noideo no likely2
noideo_likely2_c # regression results
coeff = noideo_likely2_c [1:6,1] # save coefficientes
coeff2 = cbind(coeff) # matrix of coefficientes
c_se = noideo_likely2_c [1:6,2] # save cluster standard errors
c_se2 = cbind(c_se) # matrix of cluster standard errors
results = data.frame(coeff2,c_se2) # generate dataset 
colnames(results) = c("y1","r1")  # change column names
results = results[-1,] # drop intercept 
baseline =  c(0,0) # gen baseline rows
results <- rbind(baseline,results[1, ], baseline,results[2:3, ],baseline,results[4:5, ]) # add baselines
rownames(results) = c("1b.atideology","2.atideology",
                      "1b.atprofession","2.atprofession","3.atprofession",
                      "1b.atage","2.atage","3.atage")
results 
write.table(results, "~/Dropbox/Cerrillos 2017/08_replication/010_noideo_nolikely2.txt", sep="\t")

# noideo likely2
noideo_nolikely2_c # regression results
coeff = noideo_nolikely2_c [1:6,1] # save coefficientes
coeff2 = cbind(coeff) # matrix of coefficientes
c_se = noideo_nolikely2_c [1:6,2] # save cluster standard errors
c_se2 = cbind(c_se) # matrix of cluster standard errors
results = data.frame(coeff2,c_se2) # generate dataset 
colnames(results) = c("y1","r1")  # change column names
results = results[-1,] # drop intercept 
baseline =  c(0,0) # gen baseline rows
results <- rbind(baseline,results[1, ], baseline,results[2:3, ],baseline,results[4:5, ]) # add baselines
rownames(results) = c("1b.atideology","2.atideology",
                      "1b.atprofession","2.atprofession","3.atprofession",
                      "1b.atage","2.atage","3.atage")
results 
results$y1 = 0  # no cases for likely voters
results$r1 = 0 # no cases for likely voters
write.table(results, "~/Dropbox/Cerrillos 2017/08_replication/011_noideo_likely2.txt", sep="\t")

# noideo difference
noideo_likely2_c # regression results
coeff = noideo_likely2_c [8:12,1] # save coefficientes
coeff2 = cbind(coeff) # matrix of coefficientes
c_se = noideo_likely2_c [8:12,2] # save cluster standard errors
c_se2 = cbind(c_se) # matrix of cluster standard errors
results = data.frame(coeff2,c_se2) # generate dataset 
colnames(results) = c("y1","r1")  # change column names
#results = results[-1,] # drop intercept 
baseline =  c(0,0) # gen baseline rows
results <- rbind(baseline,results[1, ], baseline,results[2:3, ],baseline,results[4:5, ]) # add baselines
rownames(results) = c("1b.atideology","2.atideology",
                      "1b.atprofession","2.atprofession","3.atprofession",
                      "1b.atage","2.atage","3.atage")
results 
results$y1 = 0  # no cases for likely voters
results$r1 = 0 # no cases for likely voters
write.table(results, "~/Dropbox/Cerrillos 2017/08_replication/012_noideo_likely2_difference.txt", sep="\t")
